package com.suyunyou.comm.utils;

import java.util.Date;
import java.util.List;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.suyunyou.manager.enums.SpiderSiteType;
import com.suyunyou.manager.pojo.SpiderRule;
import com.suyunyou.manager.pojo.SpiderSite;
import com.suyunyou.manager.service.SpiderRuleService;
import com.suyunyou.manager.service.SpiderSiteService;
import com.suyunyou.spider.Spider;
import com.suyunyou.spider.data.FullData;
import com.suyunyou.spider.data.SiteData;
import com.suyunyou.spider.utils.RefreshRuleUtil;
import com.suyunyou.spider.utils.SpiderUtil;
import com.suyunyou.spider.utils.WeixinGzhUtil;
import com.system.comm.utils.FrameSpringBeanUtil;
import com.system.comm.utils.FrameStringUtil;
import com.system.comm.utils.FrameTimeUtil;

/**
 * 初始化和重置
 * @author yuejing
 * @date 2016年6月26日 下午5:27:21
 * @version V1.0.0
 */
public class RunSpiderUtil {

	private static final Logger LOGGER = LoggerFactory.getLogger(RunSpiderUtil.class);
	private static Spider spider;

	/**
	 * 初始化
	 */
	public static void init() {
		spider = new Spider();
		new Thread(new Runnable() {
			@Override
			public void run() {

				FullData.reset();
				SpiderUtil.clearPluginsFetcherPage();
				boolean result = initConfig();
				if (result) {
					//spider.addSite("http://www.blogjava.net");
					/*String regex = "http://www.blogjava.net/[\w|\d]+/[\w|\d]+/\d+/\d+/\d+/\d+.html";
		String titleSelect = "title";
		String contentSelect = "body .post";
		spider.addSiteFetcherPage(regex, titleSelect, contentSelect);*/
					//spider.addSite("http://www.linuxidc.com");
					//spider.addSite("http://blog.csdn.net");

					spider.run();
				} else {
					LOGGER.error("没有找到需要爬取的列表!");
				}
			}
		}).start();
		
		// 初始化重爬入口页
		handleSiteRuleTask();
	}

	/**
	 * 初始化配置
	 * @return
	 */
	private static boolean initConfig() {
		SpiderSiteService siteService = FrameSpringBeanUtil.getBean(SpiderSiteService.class);
		List<SpiderSite> sites = siteService.findEnable();
		if (sites.size() == 0) {
			return false;
		}
		SiteData.adds(sites);
		for (SpiderSite site : sites) {
			addSite(site);
		}

		SpiderRuleService ruleService = FrameSpringBeanUtil.getBean(SpiderRuleService.class);
		List<SpiderRule> rules = ruleService.findEnable();
		for (SpiderRule rule : rules) {
			SpiderUtil.addSiteFetcherPage(rule.getRegex(), rule.getTitleSelect(), rule.getContentSelect());
		}
		return true;
	}
	
	private static void addSite(SpiderSite site) {
		//特殊处理公众号，获取公众号的链接
		if(SpiderSiteType.WEIXIN_GZH.getCode() == site.getType().intValue()) {
			String url = WeixinGzhUtil.gzhLink(site);
			if(FrameStringUtil.isEmpty(url)) {
				LOGGER.error("=======================================================\n\n"
						+ "==============================公众号[" + site.getName() + "]爬取内容异常!\n\n"
						+ "==========================================================");
				return;
			}
			SpiderUtil.addSite(url, site.getSiteId());
		} else {
			SpiderUtil.addSite(site.getUrl(), site.getSiteId());
		}
	}

	/**
	 * 处理网站轮询重爬入口页
	 */
	private static void handleSiteRuleTask() {
		SpiderSiteService siteService = FrameSpringBeanUtil.getBean(SpiderSiteService.class);
		ScheduledExecutorService service = new ScheduledThreadPoolExecutor(3, new ThreadFactory() {
			@Override
			public Thread newThread(Runnable r) {
				Thread thread = new Thread(r);
				LOGGER.info("初始重新爬取网站入口页的线程:" + thread.getName());
				return thread;
			}
		});
		Runnable runnable = new Runnable() {
			@Override
			public void run() {
				List<SpiderSite> sites = siteService.findEnable();
				if (sites.size() == 0) {
					return;
				}
				for (SpiderSite site : sites) {
					if (FrameStringUtil.isEmpty(site.getRule())) {
						continue;
					}
					Date handleTime = FrameTimeUtil.getTime();
					if (site.getRuleTime() == null) {
						site.setRuleTime(handleTime);
						siteService.updateRuleTime(site.getSiteId(), handleTime);
					}
					// 判断是否在执行周期内
					if (!RefreshRuleUtil.isHandle(site, handleTime)) {
						continue;
					}
					LOGGER.info("重新爬取网站入口页[" + site.getUrl() + "], 上次爬取时间[" + FrameTimeUtil.parseString(site.getRuleTime()) + "]/当前爬取时间[" + FrameTimeUtil.getStrTime() + "]");
					
					addSite(site);
					
					// 更新执行时间
					siteService.updateRuleTime(site.getSiteId(), handleTime);
				}
			}
		};
		// 第二个参数为首次执行的延时时间，第三个参数为定时执行的间隔时间 
		int initialDelay = 20;
		int period = 60;
		service.scheduleAtFixedRate(runnable, initialDelay, period, TimeUnit.SECONDS);
	}

	/**
	 * 重置爬虫
	 */
	public static void reset() {
		//分布式 要修改为同时重置所有服务的
		spider.stop();
		init();
		LOGGER.info("重启爬虫服务");
	}
}